package com.xuxueli.crawler.loader.strategy;

import com.xuxueli.crawler.conf.XxlCrawlerConf;
import com.xuxueli.crawler.loader.PageLoader;
import com.xuxueli.crawler.model.PageRequest;
import com.xuxueli.crawler.util.UrlUtil;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.openqa.selenium.By;
import org.openqa.selenium.JavascriptExecutor;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeOptions;
import org.openqa.selenium.support.ui.ExpectedConditions;
import org.openqa.selenium.support.ui.WebDriverWait;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.time.Duration;
import java.util.concurrent.TimeUnit;

public class SeleniumChromePageLoader extends PageLoader {
    private static Logger logger = LoggerFactory.getLogger(SeleniumChromePageLoader.class);

    private String driverPath;
    public SeleniumChromePageLoader(String driverPath) {
        this.driverPath = driverPath;
    }

    @Override
    public Document load(PageRequest pageRequest) {
        if (!UrlUtil.isUrl(pageRequest.getUrl())) {
            return null;
        }

        // 设置ChromeDriver路径（如果已添加到系统PATH则可省略）
        System.setProperty("webdriver.chrome.driver", driverPath);

        // 设置Chrome无头模式选项
        ChromeOptions options = new ChromeOptions();
        options.addArguments("--User-Agent=Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36");
//        options.addArguments("--headless");
//        options.addArguments("--disable-gpu");
//        options.addArguments("--window-size=1920,1080");
//        options.addArguments("--remote-debugging-address=0.0.0.0");
//        options.addArguments("--remote-debugging-port=9222");
        options.addArguments("--remote-allow-origins=*");
        options.addArguments("--window-size=1280,800");


        WebDriver webDriver = new ChromeDriver(options);
        try {
            // （可选）等待动态内容加载完成，例如等待特定元素出现或使用隐式等待
            // 这里假设等待一个特定ID的元素出现表示数据加载完成
//             WebDriverWait wait = new WebDriverWait(webDriver, Duration.ofMillis(30));
//             wait.until(ExpectedConditions.presenceOfElementLocated(By.id("pdfImgFiles")));

            // 或者使用隐式等待，设置全局等待时间
            // driver.manage().timeouts().implicitlyWait(10, TimeUnit.SECONDS);
//            webDriver.manage().timeouts().implicitlyWait(Duration.ofSeconds(20));

            webDriver.get(pageRequest.getUrl());

            // 使用上述等待无效果，直接使用进程休眠5S
            try {
                Thread.sleep(XxlCrawlerConf.TIMEOUT_MILLIS_DEFAULT);
            } catch (InterruptedException e) {
                e.printStackTrace();
            }

            WebElement webElement = webDriver.findElement(By.xpath("/html"));
            String content = webElement.getAttribute("outerHTML");
//            System.out.println(content);
            Document html = Jsoup.parse(content);
            return html;
        } finally {
            // 关闭浏览器并释放资源
            webDriver.quit();
        }
    }
}
